/**************************************************************************
	Replication do-file: "Corruption in Customs"
	Cyril Chalendard, Ana Fernandes, Gael Raballand and Bob Rijkers
	
	Created on: 01/07/2022
**************************************************************************/

clear all
set more off, perm
cap log close
cls

* ----------------------- DIRECTORIES AND FOLDERS ----------------------- *

* Directories in which data are stored
global main "PUT YOUR DIRECTORY PATH HERE"
cd "$folder"

* Folders
global outputdata = "$main\Output Data"
cap mkdir "$main\Tables"
global tables = "$main\Tables"

* ----------------------------- BEGINS HERE ----------------------------- *

* -------
* Dataset
* -------
use "$outputdata\CFRR.dta", clear // open dataset

* Sample
keep if sample_int==1 & eis_f!=. // keep observations of interest


* ---------
* Table A14
* ---------

* Globals
global dep "ln_time fraud c1_dlog_val c1_dlog_tax htrl_f_int"
global controls "tax_rate risk red_i mixed_dec differentiated valitrade_advice"

* Matrix
** Panel with five types of SE
mat tableA14_1 = J(8,5,.)
mat tableA14_1T = J(1,5,.)
** Panels with one independent variable of interest
forvalues x = 2/3 {
	mat tableA14_`x' = J(4,5,.)
	mat tableA14_`x'T = J(1,5,.)
}


* Panel A
* -------

* Loop for estimations
** Robust SE
local i = 1
local j = 1
foreach x in $dep {

	qui: reghdfe `x' eis_f $controls countryorig* max_twodigit* if sample_reg==1 , absorb(insp_f key_dec ym_*) vce(robust)

	mat tableA14_1[`i',`j'] = _b[eis_f]
	local i = `i' + 1
	mat tableA14_1[`i',`j'] = _se[eis_f]
	mat tableA14_1T[1,`j'] = e(N)
	local i = `i' + 6
	mat tableA14_1[`i',`j'] = e(r2)
	local i = `i' -7
	local j = `j' +1
}
** SE clustered by inspector
local j = 1
foreach x in $dep {

	qui: reghdfe `x' eis_f $controls countryorig* max_twodigit* if sample_reg==1 , absorb(insp_f key_dec ym_*) cluster(insp_f)

	mat tableA14_1[3,`j'] = _se[eis_f]
	local j = `j' +1
}
** SE clustered by broker
local j = 1
foreach x in $dep {

	qui: reghdfe `x' eis_f $controls countryorig* max_twodigit* if sample_reg==1 , absorb(insp_f key_dec ym_*) cluster(key_dec)

	mat tableA14_1[4,`j'] = _se[eis_f]
	local j = `j' +1
}
** SE clustered by inspector and broker
local j = 1
foreach x in $dep {

	qui: reghdfe `x' eis_f $controls countryorig* max_twodigit* if sample_reg==1 , absorb(insp_f key_dec ym_*) cluster(insp_f key_dec)

	mat tableA14_1[5,`j'] = _se[eis_f]
	local j = `j' +1
}
** SE clustered by inspector, broker and semester
local j = 1
foreach x in $dep {

	qui: reghdfe `x' eis_f $controls countryorig* max_twodigit* if sample_reg==1 , absorb(insp_f key_dec ym_*) cluster(insp_f key_dec nc_sem)

	mat tableA14_1[6,`j'] = _se[eis_f]
	local j = `j' +1
}


* Panel B
* -------

* Dataset
preserve
keep if sample_reg==1 // keep observations of interest
collapse (mean) eis_f $dep $controls (max) max_twodigit* countryorig* , by(key_cuo nc_sem insp_f key_dec) // collapse at the semester-inspector-broker level

* Loop for estimations
local i = 1
local j = 1
foreach x in $dep {

	qui: reghdfe `x' eis_f $controls countryorig* max_twodigit* , absorb(insp_f nc_sem key_dec) vce(cluster insp_f key_dec)

	mat tableA14_2[`i',`j'] = _b[eis_f]
	local i = `i' + 1
	mat tableA14_2[`i',`j'] = _se[eis_f]
	mat tableA14_2T[1,`j'] = e(N)
	local i = `i' + 2
	mat tableA14_2[`i',`j'] = e(r2)
	local i = `i' -3
	local j = `j' +1
}

restore


* Panel C
* -------

* Dataset
preserve
egen brmean = mean(sig_c_f) if sample_reg==1 , by(key_dec nc_sem) // calculate mean of excess interaction by semester-broker
cap drop n
bys bs: gen n = _n // create index for semester-broker pair
sort nc_sem brmean // sort observations
egen rankb = rank(brmean) if sample_reg==1 & n==1 , by(nc_sem) field // rank mean by semester
egen brrank = mean(rankb) , by(bs) // calculate the mean of the rank by semester-broker
drop brmean rankb n bs // drop unnecessary variables

* Loop for estimations
local i = 1
local j = 1
foreach x in $dep {

	qui: reghdfe `x' eis_f $controls countryorig* max_twodigit* if sample_reg==1 & brrank>5 , absorb(insp_f key_dec ym_*) vce(cluster key_dec insp_f)

	mat tableA14_3[`i',`j'] = _b[eis_f]
	local i = `i' + 1
	mat tableA14_3[`i',`j'] = _se[eis_f]
	mat tableA14_3T[1,`j'] = e(N)
	local i = `i' + 2
	mat tableA14_3[`i',`j'] = e(r2)
	local i = `i' -3
	local j = `j' +1
}

restore


* Export
* ------
putexcel set "$tables\Table A14.xlsx", replace sheet(TableA14) // create a new excel spreadsheet

* Title
putexcel (A1:F1), merge hcenter vcenter
putexcel A1 = "Before delegated randomization of inspector assignment"

* Dependent variables
putexcel A2 = "Dependent variable:", left vcenter bold
local cells `" "B" "C" "D" "E" "F" "'
local labels `" "Time" "Fraud" "\Delta log value" "\Delta log tax" "Hyp. tax revenue losses" "'
forvalues i = 1(1)5 {

	local x: word `i' of `labels'
	local y: word `i' of `cells'

	putexcel `y'2 = "`x'", hcenter vcenter bold	
}

* Panel's Names
local cells `" "4" "15" "22" "'
local labels `" "A. Different Types of Clusters for Standard Errors" "B. Data Collapsed at the Broker-Inspector-Semester Level" "C. Dropping Top 5 Brokers with the Largest Share of Declarations with Excess Interaction Each Semester" "'
forvalues i = 1(1)3 {

	local x: word `i' of `labels'
	local y: word `i' of `cells'

	putexcel (A`y':F`y'), merge hcenter vcenter bold
	putexcel A`y' = "`x'"
}

* Panel A
** Independent variables
local j = 6
local labels `" "Excess interaction share" "Robust SE" "SE clustered by inspector" "SE clustered by broker" "SE clustered by broker and inspector (baseline)" "SE three-way clustered by inspector, broker, and semester" "Observations" "R-squared" "'
forvalues i = 1(1)8 {

	local x: word `i' of `labels'

	putexcel A`j' = "`x'", left vcenter
	local j = `j'+1
}
** Coefficients
putexcel B6 = matrix(tableA14_1), nformat(0.000) hcenter vcenter
putexcel B12 = matrix(tableA14_1T), nformat(number_sep) hcenter vcenter

* Panels B and C
** Coefficients
local j = 2
forvalues i = 17(7)24 {

	putexcel A`i' = "Excess interaction share", left vcenter
	putexcel B`i' = matrix(tableA14_`j'), nformat(0.000) hcenter vcenter
	local j = `j'+1
}
** Observations
local j = 2
forvalues i = 19(7)26 {

	putexcel A`i' = "Observations", left vcenter
	putexcel B`i' = matrix(tableA14_`j'T), nformat(number_sep) hcenter vcenter
	local j = `j'+1
}
** R2
forvalues i = 20(7)27 {
	putexcel A`i' = "R-squared", left vcenter
}

* Column numbers
local cells `" "B" "C" "D" "E" "F" "B" "C" "D" "E" "F" "B" "C" "D" "E" "F" "'
forvalues i = 1(1)15 {

	local y: word `i' of `cells'

	if `i' < 6 {
		putexcel `y'5 = "(`i')", hcenter vcenter
	}

	if `i' > 5 & `i' < 11 {
		putexcel `y'16 = "(`i')", hcenter vcenter
	}

	if `i' > 10 {
		putexcel `y'23 = "(`i')", hcenter vcenter
	}
}


* -------------------------------- ENDS HERE -------------------------------- *